Predict the number of arrivals for 14 days after last known ship arrival date. I am using facebook's prohet model which decomposes time series (much like the Holt Winters model) to detect trends, seasonality etc to predict the arrivals for the next 2 weeks.
pandas, fbprohet, folium maps.
import pandas as pd
import numpy as np
import matplotlib.style as style
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
from fbprophet import Prophet
from IPython.display import Image
import folium
style.use('ggplot')
ship_counts = pd.read_csv("data/port_counts/geohash_wwu_ports.csv")
print(ship_counts.cluster_labels.value_counts())
ship_counts.head()
map2 = folium.Map(location=[38.895642, 118.668746], tiles='CartoDB dark_matter', zoom_start=11)
folium.Marker([38.895642, 118.668746], popup='detected port location').add_to(map2)
map2
from math import radians, cos, sin, asin, sqrt
R = 6371.0088
def haversine(lat1, lon1, lat2, lon2):
dLat = radians(lat2 - lat1)
dLon = radians(lon2 - lon1)
lat1 = radians(lat1)
lat2 = radians(lat2)
a = sin(dLat/2)**2 + cos(lat1)*cos(lat2)*sin(dLon/2)**2
c = 2*asin(sqrt(a))
return R * c
print(ship_counts.loc[ship_counts.cluster_labels == 0].date.min(),
ship_counts.loc[ship_counts.cluster_labels == 0].date.max())
ship_counts.loc[ship_counts.cluster_labels == 0].number_ships.hist()
Make predictions for 14 days into the last known date in teh dataset for the port.
# Python
m = Prophet()
port_arrivals = ship_counts.loc[ship_counts.cluster_labels == 0][[
"date", "number_ships"]]
port_arrivals.rename(columns={"date": "ds", "number_ships": "y"},inplace=True)
m.fit(port_arrivals)
future = m.make_future_dataframe(periods=14)
forecast = m.predict(future)
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()
fig1 = m.plot(forecast)
fig2 = m.plot_components(forecast)
from fbprophet.plot import plot_plotly
import plotly.offline as py
py.init_notebook_mode()
fig = plot_plotly(m, forecast) # This returns a plotly Figure
py.iplot(fig)